In [1]:
!pip install pandas plotly dash
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook" # or "iframe", "notebook_connected"
from dash import Dash, dcc, html, Input, Output
Requirement already satisfied: pandas in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (2.3.2) Requirement already satisfied: plotly in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (6.3.0) Requirement already satisfied: dash in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (3.2.0) Requirement already satisfied: numpy>=1.26.0 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2.3.2) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2025.2) Requirement already satisfied: tzdata>=2022.7 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2025.2) Requirement already satisfied: narwhals>=1.15.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from plotly) (2.3.0) Requirement already satisfied: packaging in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from plotly) (25.0) Requirement already satisfied: Flask<3.2,>=1.0.4 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (3.1.2) Requirement already satisfied: Werkzeug<3.2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (3.1.3) Requirement already satisfied: importlib-metadata in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (8.7.0) Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (4.15.0) Requirement already satisfied: requests in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (2.32.5) Requirement already satisfied: retrying in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (1.4.2) Requirement already satisfied: nest-asyncio in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (1.6.0) Requirement already satisfied: setuptools in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (80.9.0) Requirement already satisfied: blinker>=1.9.0 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (1.9.0) Requirement already satisfied: click>=8.1.3 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (8.2.1) Requirement already satisfied: itsdangerous>=2.2.0 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (2.2.0) Requirement already satisfied: jinja2>=3.1.2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (3.1.6) Requirement already satisfied: markupsafe>=2.1.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (3.0.2) Requirement already satisfied: colorama in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from click>=8.1.3->Flask<3.2,>=1.0.4->dash) (0.4.6) Requirement already satisfied: six>=1.5 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0) Requirement already satisfied: zipp>=3.20 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from importlib-metadata->dash) (3.23.0) Requirement already satisfied: charset_normalizer<4,>=2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (3.4.3) Requirement already satisfied: idna<4,>=2.5 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (2.5.0) Requirement already satisfied: certifi>=2017.4.17 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (2025.8.3)
In [2]:
df = pd.read_csv('Supermart Grocery Sales - Retail Analytics Dataset.csv')
print("Initial Shape:", df.shape)
df.head()
Initial Shape: (9994, 11)
Out[2]:
| Order ID | Customer Name | Category | Sub Category | City | Order Date | Region | Sales | Discount | Profit | State | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | OD1 | Harish | Oil & Masala | Masalas | Vellore | 11-08-2017 | North | 1254 | 0.12 | 401.28 | Tamil Nadu |
| 1 | OD2 | Sudha | Beverages | Health Drinks | Krishnagiri | 11-08-2017 | South | 749 | 0.18 | 149.80 | Tamil Nadu |
| 2 | OD3 | Hussain | Food Grains | Atta & Flour | Perambalur | 06-12-2017 | West | 2360 | 0.21 | 165.20 | Tamil Nadu |
| 3 | OD4 | Jackson | Fruits & Veggies | Fresh Vegetables | Dharmapuri | 10-11-2016 | South | 896 | 0.25 | 89.60 | Tamil Nadu |
| 4 | OD5 | Ridhesh | Food Grains | Organic Staples | Ooty | 10-11-2016 | South | 2355 | 0.26 | 918.45 | Tamil Nadu |
In [3]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
In [4]:
# Load dataset
df = pd.read_csv("Supermart Grocery Sales - Retail Analytics Dataset.csv")
In [5]:
# Convert order date
df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.strftime('%b')
In [6]:
# Supermart Grocery Color Palette
supermart_colors = ["#2E8B57", "#FFA500", "#FFD700", "#8B4513", "#FF6347", "#6B8E23"]
In [7]:
# Plot 1: Sales by Category
sales_category = df.groupby("Category")["Sales"].sum().reset_index()
fig1 = px.bar(
sales_category, x="Category", y="Sales",
color="Category",
color_discrete_sequence=supermart_colors,
title="Total Sales by Category",
hover_data={"Sales":":,.0f"}
)
fig1.update_layout(template="plotly_white")
In [8]:
# --- FIXED Plot 2: Monthly Sales Trend (clean syntax) ---
import plotly.express as px
# Ensure datetime & numeric
df["Order Date"] = pd.to_datetime(df["Order Date"], errors="coerce")
df["Sales"] = pd.to_numeric(df["Sales"], errors="coerce")
# Create numeric + label months
df["MonthNo"] = df["Order Date"].dt.month
month_labels = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
monthly_sales = (
df.dropna(subset=["MonthNo", "Sales"])
.groupby("MonthNo", as_index=False)["Sales"].sum()
.sort_values("MonthNo")
)
monthly_sales["Month"] = monthly_sales["MonthNo"].apply(lambda m: month_labels[int(m)-1])
# Use Supermart palette (fallback if not defined)
palette = [supermart_colors[0]] if "supermart_colors" in globals() else ["#2E8B57"]
# Plot
fig2 = px.line(
monthly_sales,
x="Month",
y="Sales",
markers=True,
title="Monthly Sales Trend",
color_discrete_sequence=palette
)
fig2.update_traces(hovertemplate="Month: %{x}<br>Sales: %{y:,}")
fig2.show()
In [9]:
# Plot 3: Sales Distribution by Category (Pie Chart)
sales_category = df.groupby("Category")["Sales"].sum().reset_index()
fig3 = px.pie(
sales_category,
values="Sales",
names="Category",
title="Sales Distribution by Category",
color="Category",
color_discrete_sequence=supermart_colors,
hover_data=['Sales'] # Changed hover_data to a list containing only the column name
)
fig3.update_traces(
textinfo="percent+label",
pull=[0.05] * len(sales_category),
hovertemplate="<b>%{label}</b><br>Sales: %{value:,.0f}<br>Percentage: %{percent}" # Added hovertemplate for formatting
)
fig3.update_layout(template="plotly_white")
fig3.show()
In [10]:
# Plot 4: Top Cities by Sales
top_cities = df.groupby("City")["Sales"].sum().nlargest(7).reset_index()
fig4 = px.bar(
top_cities, x="City", y="Sales",
color="City",
color_discrete_sequence=supermart_colors,
title="Top 7 Cities by Sales",
hover_data={"Sales":":,.0f"}
)
fig4.update_layout(xaxis_tickangle=45, template="plotly_white")
In [11]:
import plotly.express as px
#Plot A: Profit vs Discount Scatter
figA = px.scatter(
df, x="Discount", y="Profit",
color="Category",
size="Sales",
hover_data=["Sub Category", "City"],
color_discrete_sequence=supermart_colors,
title="Profit vs Discount by Category (Bubble Size = Sales)"
)
figA.update_traces(hovertemplate="Discount: %{x}<br>Profit: %{y}<br>Sales: %{marker.size:,}")
figA.show()
In [12]:
figA_faceted = px.scatter(
df, x="Discount", y="Profit",
size="Sales",
color="Category",
hover_data=["Sub Category", "City"],
opacity=0.6,
facet_col="Category", # separate scatter for each category
color_discrete_sequence=supermart_colors,
title="Profit vs Discount by Category (Faceted View)"
)
figA_faceted.show()
In [13]:
import plotly.express as px
# Limit extreme outliers (focus on middle 95% of data)
sales_min, sales_max = df["Sales"].quantile([0.01, 0.99])
profit_min, profit_max = df["Profit"].quantile([0.01, 0.99])
figB = px.density_heatmap(
df, x="Sales", y="Profit",
nbinsx=40, nbinsy=40, # smoother binning
color_continuous_scale="YlGnBu",
title="Sales vs Profit Density Heatmap (Outliers Removed)"
)
# Zoom into 1st–99th percentile range (removes extreme values)
figB.update_layout(
xaxis=dict(range=[sales_min, sales_max]),
yaxis=dict(range=[profit_min, profit_max])
)
figB.update_traces(
hovertemplate="Sales: %{x}<br>Profit: %{y}<br>Count: %{z}",
colorbar_title="Density"
)
figB.show()
In [14]:
# Plot C: Sunburst Chart (Category → Sub Category → City)
figC = px.sunburst(
df, path=["Category", "Sub Category", "City"],
values="Sales",
color="Sales",
color_continuous_scale=px.colors.sequential.Greens,
title="Sales Breakdown (Category → Sub Category → City)"
)
figC.show()
In [15]:
# Plot D: Animated Sales Over Time
# Group sales by month-year
df["YearMonth"] = df["Order Date"].dt.to_period("M").astype(str)
time_sales = df.groupby(["YearMonth", "Category"])["Sales"].sum().reset_index()
figD = px.bar(
time_sales, x="Category", y="Sales",
color="Category",
animation_frame="YearMonth",
color_discrete_sequence=supermart_colors,
title="Animated Sales by Category Over Time"
)
figD.show()
In [ ]: